{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\h5py\\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
      "  from ._conv import register_converters as _register_converters\n"
     ]
    }
   ],
   "source": [
    "import tensorflow as tf\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import RobustScaler\n",
    "from sklearn.metrics import precision_score, recall_score, accuracy_score\n",
    "\n",
    "from tensorflow.contrib.layers import fully_connected\n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "default = pd.read_csv('../data/credit_card_default.csv')\n",
    "default.rename(columns=lambda x: x.lower(), inplace=True)\n",
    "default.rename(columns={'pay_0':'pay_1','default payment next month':'default'}, inplace=True)\n",
    "# Base values: female, other_education, not_married\n",
    "default['grad_school'] = (default['education'] == 1).astype('int')\n",
    "default['university'] = (default['education'] == 2).astype('int')\n",
    "default['high_school'] = (default['education'] == 3).astype('int')\n",
    "default.drop('education', axis=1, inplace=True)\n",
    "\n",
    "default['male'] = (default['sex']==1).astype('int')\n",
    "default.drop('sex', axis=1, inplace=True)\n",
    "\n",
    "default['married'] = (default['marriage'] == 1).astype('int')\n",
    "default.drop('marriage', axis=1, inplace=True)\n",
    "\n",
    "# For pay_n features if >0 then it means the customer was delayed on that month\n",
    "pay_features = ['pay_' + str(i) for i in range(1,7)]\n",
    "for p in pay_features:\n",
    "    default[p] = (default[p] > 0).astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "target_name = 'default'\n",
    "X = default.drop('default', axis=1)\n",
    "feature_names = X.columns\n",
    "robust_scaler = RobustScaler()\n",
    "X = robust_scaler.fit_transform(X)\n",
    "y = default[target_name]\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=12, stratify=y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "n_epochs = 40\n",
    "batch_size = 100 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_placeholder = tf.placeholder(X_train.dtype, shape=X_train.shape)\n",
    "y_placeholder = tf.placeholder(y_train.dtype, shape=y_train.shape)\n",
    "\n",
    "dataset = tf.data.Dataset.from_tensor_slices((X_placeholder, y_placeholder))\n",
    "dataset = dataset.shuffle(buffer_size=10000)\n",
    "dataset = dataset.batch(batch_size)\n",
    "iterator = dataset.make_initializable_iterator()\n",
    "next_element = iterator.get_next()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "n_inputs = X_train.shape[1] #25\n",
    "n_hidden1 = 200\n",
    "n_hidden2 = 200 \n",
    "n_hidden3 = 200\n",
    "n_outputs = 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = tf.placeholder(X_train.dtype, shape=[None,n_inputs])\n",
    "y = tf.placeholder(y_train.dtype)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "def DNN(X_values):\n",
    "    hidden1 = fully_connected(X_values, n_hidden1, activation_fn=tf.nn.elu)\n",
    "    hidden2 = fully_connected(hidden1, n_hidden2, activation_fn=tf.nn.elu)\n",
    "    hidden3 = fully_connected(hidden2, n_hidden3, activation_fn=tf.nn.elu)\n",
    "    logits = fully_connected(hidden3, n_outputs, activation_fn=None)\n",
    "    return tf.cast(logits, dtype=tf.float32)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "logits = DNN(X)\n",
    "cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
    "loss = tf.reduce_mean(cross_entropy)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "probs = tf.nn.softmax(logits)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "optimizer = tf.train.AdamOptimizer(learning_rate=0.001)\n",
    "training_op = optimizer.minimize(loss)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: 1\n",
      "Epoch: 2\n",
      "Epoch: 3\n",
      "Epoch: 4\n",
      "Epoch: 5\n",
      "Epoch: 6\n",
      "Epoch: 7\n",
      "Epoch: 8\n",
      "Epoch: 9\n",
      "Epoch: 10\n",
      "Epoch: 11\n",
      "Epoch: 12\n",
      "Epoch: 13\n",
      "Epoch: 14\n",
      "Epoch: 15\n",
      "Epoch: 16\n",
      "Epoch: 17\n",
      "Epoch: 18\n",
      "Epoch: 19\n",
      "Epoch: 20\n",
      "Epoch: 21\n",
      "Epoch: 22\n",
      "Epoch: 23\n",
      "Epoch: 24\n",
      "Epoch: 25\n",
      "Epoch: 26\n",
      "Epoch: 27\n",
      "Epoch: 28\n",
      "Epoch: 29\n",
      "Epoch: 30\n",
      "Epoch: 31\n",
      "Epoch: 32\n",
      "Epoch: 33\n",
      "Epoch: 34\n",
      "Epoch: 35\n",
      "Epoch: 36\n",
      "Epoch: 37\n",
      "Epoch: 38\n",
      "Epoch: 39\n",
      "Epoch: 40\n",
      "Done Trainning!\n"
     ]
    }
   ],
   "source": [
    "with tf.Session() as sess:\n",
    "    tf.global_variables_initializer().run()\n",
    "    for epoch in range(n_epochs):\n",
    "        sess.run(iterator.initializer, feed_dict={X_placeholder: X_train, y_placeholder: y_train})\n",
    "        while True:\n",
    "            try:\n",
    "                batch_data = sess.run(next_element)\n",
    "                X_batch = batch_data[0]\n",
    "                y_batch = batch_data[1]\n",
    "                sess.run(training_op, feed_dict={X: X_batch, y:y_batch})\n",
    "            except tf.errors.OutOfRangeError:\n",
    "                break\n",
    "        print(\"Epoch: {}\".format(epoch+1))\n",
    "    print(\"Done Trainning!\")\n",
    "    probabilities = probs.eval(feed_dict={X: X_test})[:,1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Recall: 80.12\n",
      "Precision: 34.28\n",
      "Accuracy: 61.60\n"
     ]
    }
   ],
   "source": [
    "y_pred = (probabilities > 0.16).astype(int)\n",
    "print('Recall: {:0.2f}'.format(100*recall_score(y_true=y_test, y_pred=y_pred)))\n",
    "print('Precision: {:0.2f}'.format(100*precision_score(y_true=y_test, y_pred=y_pred)))\n",
    "print('Accuracy: {:0.2f}'.format(100*accuracy_score(y_true=y_test, y_pred=y_pred)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
